Antibiotics gene linkage graph

Load generic libraries

source('configuration.r')

Load plot specific libraries

library(foreach)
library(readr)
suppressMessages(library(igraph))
library(ggraph)
library(ggalluvial)
library(ggnewscale)

Create graph edge data

dat <- read.table('../tables/antibiotics_gene_linkage.tsv', head=FALSE, stringsAsFactors = FALSE) %>% 
  mutate(id=paste0(V1,':',V2, ':', V3)) %>% 
  mutate(V6=str_replace(V6, 'PheCmlA5', 'Phe'))

edges.full <- foreach(id=unique(dat$id), .combine=rbind) %do% {
    tmp <- dat[dat$id == id, ]
    if(nrow(tmp) < 2) {
        NULL
    }else{
        edge <- data.frame(t(combn(sort(tmp$V6), 2)), id=id)
        edge$dist <- foreach(r=1:nrow(edge), .combine = c) %do% {
          abs(rowMeans(tmp[tmp$V6 == edge[r, ]$X1, 4:5]) - rowMeans(tmp[tmp$V6 == edge[r, ]$X2, 4:5]))
        }
        edge
    }
}
## save for ad hoc analysis
write.table(edges.full, '../output_tables/ar_gene_linkage_edge_list_uncollapsed.tsv', row.names = F, quote=F, sep='\t')

Function for ploting

get_graph_data <- function(pattern='', reverse=FALSE){
  if(reverse){
    edges.collapsed <- filter(edges.full, !str_detect(id, pattern))
  }else{
    edges.collapsed <- filter(edges.full, str_detect(id, pattern)) 
  }
    
  ## filtering and collapse into edge weights
   edges.collapsed <- group_by(edges.collapsed, X1, X2) %>% 
    tally() %>% 
    filter(n>2)   ### keep edges with more than 2 support
  
  ## Run MCL graph clustering
  write.table(edges.collapsed, 'tmp_edges.tsv', quote=F, sep='\t', col.names = F, row.names = F)
  system('/mnt/software/bin/mcl tmp_edges.tsv  --abc -o tmp_mcl.tsv -overlap keep -I 3 2>/dev/null')
  mcl <- read_tsv('tmp_mcl.tsv', col_names = F)
  grp <- foreach(c=1:nrow(mcl), .combine = 'rbind') %do%{
    tmp <- as.character(na.exclude(as.character(mcl[c,])))
    data.frame(vertex=tmp, grp=c, stringsAsFactors = F)
  } %>% data.frame(row.names = 1)
  grp$grp[grp$grp >= (which(table(grp) < 3)[1])] <- NA ## no annotation for clusters with 2 genes only
  
  ## generate graph data
  g <- graph_from_data_frame(edges.collapsed, directed=FALSE)
  ## vertices
  colors <- pal_simpsons('springfield')(16)
  n.colors <- colors[as.numeric(as.factor(str_replace(V(g)$name, '.*_', '')))]
  V(g)$color <- n.colors
  V(g)$community <- grp[V(g)$name,]##cluster_walktrap(g)$membership
  V(g)$Type <- str_replace(V(g)$name, '.*_', '')
  ## Edges
  E(g)$width <- edges.collapsed$n
  E(g)$community <-
    apply(as.data.frame(get.edgelist(g, names=FALSE)), 1,
                      function(x)
                        ifelse(V(g)$community[x[1]] == V(g)$community[x[2]],
                                         LETTERS[V(g)$community[x[1]]], NA))
  ## format vertex names
  aux <- function(x){
      if(length(x) > 2){
          paste0(x[1],'_',x[2])
      }else{
          x[1]
      }
  }
  V(g)$name <- sapply(str_split(V(g)$name, '_'), aux)
  #### carbapenemase
  args <- read.table('../metadata/Bla_Carb.dat', head=F, stringsAsFactors=F)[,1]
  carb <- setNames(rep('No', length(V(g)$name)),V(g)$name)
  carb[names(carb) %in% args] <- 'Yes'
  V(g)$carb <- carb
  
  g
}

## create ellipse data based on 2d scatter
get_ell <- function(ggraph_dat, grp, ellipse_pro = 0.97){
    ## adapted from https://github.com/fawda123/ggord/blob/master/R/ggord.R
    axes = c('x', 'y')
    obs <- select(ggraph_dat, one=x, two=y, Groups=!!grp)
    theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
    circle <- cbind(cos(theta), sin(theta))
    ell <- plyr::ddply(obs, 'Groups', function(x) {
        if(nrow(x) <= 2) {
            return(NULL)
        }
        sigma <- var(cbind(x$one, x$two))
        mu <- c(mean(x$one), mean(x$two))
        ed <- sqrt(qchisq(ellipse_pro, df = 2))
        data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'))
    })
    names(ell)[2:3] <- c('one', 'two')
    ell <- plyr::ddply(ell, plyr::.(Groups), function(x) x[chull(x$one, x$two), ])
    ell
}

Main figure of all genes

g <- get_graph_data()
g_cols <- c('burlywood',  'purple',  'blue', 'darkgreen', 'brown', 'red', 'gold', 'black')
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
  geom_edge_arc(aes(width=width, color=community),
                curvature = 0.0,
                alpha=0.7,
                end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm'))   +
  scale_edge_color_manual(values=g_cols, 
                          na.value = rgb(0.8,0.8,0.8,0.5)) +
  geom_node_point(aes(fill=Type, color=carb),size=5, shape=21) +
  scale_color_manual(values=c('grey','firebrick')) + 
  geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
  scale_edge_width(range = c(0.5,3)) +
  scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
  theme_void()

ell <- get_ell(p$data, "community" , 0.9) %>% mutate(Groups=as.factor(Groups))
p <- p +  new_scale_color() +
  geom_polygon(data = ell, 
                 aes_string(x='one', y='two', col='Groups', grp='Groups'),
                 lty=2, fill = NA, lwd=1) +
  scale_color_manual(values=c(g_cols, 'grey', 'gold'), guide=FALSE)
p

ggsave('../plots/fig3a.ar_gene_linkage_graph.pdf', p, width=18, height=20)

Sankey plot

Format data

edge.dat <- data.frame(v=V(g)$name, g=LETTERS[V(g)$community])

genome.info <- read.table('../tables/genome_info.dat', sep = '\t') 
merge(dat, genome.info, by=c(1,2)) %>% ## only take the high/medium quality ones
  select(Species=V1, gene=V6.x) %>% mutate(gene=str_replace(gene, '_.*','')) %>%
  filter(Species!='plasmid') -> sp.dat

sankey.dat <- merge(sp.dat, edge.dat, by.x=2, by.y=1) %>% filter(!is.na(g)) %>% 
  mutate(Species=str_replace(Species,'_',' ')) %>% 
  group_by(Species, g) %>% 
  count %>% 
  mutate(Antibiotics_cluster=as.character(g)) %>% 
  filter(n>2)

Plot

p <- ggplot(sankey.dat,aes(y=log10(n), axis1=Species, axis2=Antibiotics_cluster)) +
  geom_alluvium(aes(fill=Antibiotics_cluster), width=1/5)  +
  geom_stratum(width=1/5, fill=NA) +
  geom_text(stat = "stratum", label.strata = TRUE, size=6, fontface = "italic", nudge_x=-0.12, hjust=1) +
  scale_fill_manual(values=g_cols) +
  scale_x_discrete(limits = c("Species", "Antibiotics Cluster"), expand = c(.5, .05)) +
  theme_void() +
  theme()
p

ggsave('../plots/fig3b.ar_gene_linkage_sankey.pdf', p, width=18, height=18)

Supplementary figures

Staphylococcus aureus network

g <- get_graph_data('Staphylococcus_aureus')
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
  geom_edge_arc(aes(width=width, color=community),
                curvature = 0.0,
                alpha=0.7,
                end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm'))   +
  scale_edge_color_manual(values=g_cols, 
                          na.value = rgb(0.8,0.8,0.8,0.5)) +
  geom_node_point(aes(fill=Type, color=carb),size= as.numeric(as.factor(V(g)$carb) )*5, shape=21) +
  scale_color_manual(values=c('grey','firebrick')) + 
  geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
  scale_edge_width(range = c(0.5,3)) +
  scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
  theme_void()
p

ggsave('../plots/sup11.staph_aureus_ar_gene_linkage_graph.pdf', p, width=5, height=5)

Genome network

g <- get_graph_data('plasmid', reverse = TRUE)
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
  geom_edge_arc(aes(width=width, color=community),
                curvature = 0.0,
                alpha=0.7,
                end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm'))   +
  scale_edge_color_manual(values=g_cols, 
                          na.value = rgb(0.8,0.8,0.8,0.5)) +
  geom_node_point(aes(fill=Type, color=carb),size= as.numeric(as.factor(V(g)$carb) )*5, shape=21) +
  scale_color_manual(values=c('grey','firebrick')) + 
  geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
  scale_edge_width(range = c(0.5,3)) +
  scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
  theme_void()
p

ggsave('../plots/sup12.genome_ar_gene_linkage_graph.pdf', p, width=16, height=16)

Plasmid network

g <- get_graph_data('plasmid')
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
  geom_edge_arc(aes(width=width, color=community),
                curvature = 0.0,
                alpha=0.7,
                end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm'))   +
  scale_edge_color_manual(values=g_cols, 
                          na.value = rgb(0.8,0.8,0.8,0.5)) +
  geom_node_point(aes(fill=Type, color=carb),size= as.numeric(as.factor(V(g)$carb) )*5, shape=21) +
  scale_color_manual(values=c('grey','firebrick')) + 
  geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
  scale_edge_width(range = c(0.5,3)) +
  scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
  theme_void()
p

ggsave('../plots/sup12.plasmid_ar_gene_linkage_graph.pdf', p, width=10, height=7)

Session informaton

sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
## 
## Matrix products: default
## BLAS: /usr/lib64/R/lib/libRblas.so
## LAPACK: /usr/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggnewscale_0.1.9000 ggalluvial_0.9.1    ggraph_1.0.2       
##  [4] igraph_1.2.2        readr_1.1.1         foreach_1.4.4      
##  [7] ggsci_2.9           reshape2_1.4.3      stringr_1.3.0      
## [10] tibble_2.0.1        tidyr_0.8.2         dplyr_0.8.0.1      
## [13] gridExtra_2.3       ggplot2_3.1.0      
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_0.2.5  purrr_0.3.0       colorspace_1.3-2 
##  [4] htmltools_0.3.6   viridisLite_0.3.0 yaml_2.1.18      
##  [7] utf8_1.1.4        rlang_0.3.1       pillar_1.3.1     
## [10] glue_1.3.0        withr_2.1.2       tweenr_1.0.0     
## [13] plyr_1.8.4        munsell_0.5.0     gtable_0.2.0     
## [16] codetools_0.2-15  evaluate_0.10.1   labeling_0.3     
## [19] knitr_1.20        fansi_0.4.0       Rcpp_1.0.0       
## [22] scales_1.0.0      backports_1.1.2   farver_1.0       
## [25] ggforce_0.1.3     hms_0.4.2         digest_0.6.18    
## [28] stringi_1.3.1     ggrepel_0.8.0     rprojroot_1.3-2  
## [31] cli_1.0.1         tools_3.4.4       magrittr_1.5     
## [34] lazyeval_0.2.1    crayon_1.3.4      pkgconfig_2.0.2  
## [37] MASS_7.3-49       assertthat_0.2.0  rmarkdown_1.9    
## [40] iterators_1.0.9   viridis_0.5.1     R6_2.4.0         
## [43] units_0.6-1       compiler_3.4.4